

* Please change the “path” to the working directory to run the code

global path "data"
global path_raw "data/raw"
global path_analysis "data/analysis_data"
global path_output "data/output_tables_figures"

/*----------------------------------------------------------------------------*/

// Construct Controls //

*** Ratio of purchase loans to refinance loans

use "$path_analysis/hmda_purchase_ctn_year_analysis.dta", clear
gen n_purchase=N
keep ctn year n_purchase
merge 1:1 ctn year using "$path_analysis/hmda_refinance_ctn_year_analysis.dta"
keep if _merge==3
rename N n_refinance
gen PurRatio=n_purchase/(n_purchase+n_refinance) // purchase ratio
keep ctn year PurRatio
save "$path_analysis/covariates_purchase_refin_ratio_ctn.dta", replace

/*----------------------------------------------------------------------------*/

*** GDP
import delimited "$path_raw/bea_gdp_data.csv", clear
rename geofips FIPS
replace FIPS=substr(FIPS,3,5)
drop if substr(FIPS,3,3)=="000"
drop if geoname=="United States"
keep if unit=="Thousands of chained 2017 dollars" // real GDP
format FIPS %10s
format geoname %20s

forvalues i=9/31 {
	local j=`i'+(2001-9)
	rename v`i' gdp`j'
}

drop geoname region tablename linecode industryclassification description unit

reshape long gdp, i(FIPS) j(year)
destring gdp, replace force

destring FIPS, gen(FIPS_num)
xtset FIPS_num year

gen gdpg=(gdp-L.gdp)/L.gdp
drop FIPS_num

keep if year>=2007 & year<=2016
save "$path_analysis/covariates_gdp_growth_county.dta", replace

/*----------------------------------------------------------------------------*/

*** Unemployment rate (BLS county-level), 2007-2016
import excel "$path_raw/bls_unemployment_data.xlsx", firstrow clear
keep id_5digits Year Annual
rename id_5digits FIPS 
rename Year year
keep if year>=2007 & year<=2016
rename Annual ur
replace ur=ur/100 // unemployment rate

destring FIPS, gen(FIPS_num)
xtset FIPS_num year

gen urd=(ur-L.ur) // unemployment rate change
drop ur
drop FIPS_num
drop if year==2006

rename urd urd_county
save "$path_analysis/covariates_unemployment_rate_county.dta", replace

/*----------------------------------------------------------------------------*/

clear
import delimited "$path_raw/HPI_AT_BDL_tract.csv", varnames(1) stringcols(1) // tract-level housing price index
rename tract ctn
tostring ctn, replace force

drop state

* Make 2010 the benchmark year for the index
gen x=hpi if year==2010
bysort ctn: egen X=mean(x)
replace hpi=hpi/X*100
drop x X

keep if year>=2007 & year<=2016

keep ctn year hpi

rename ctn geoid_tract_20
joinby geoid_tract_20 using "$path_raw/census_crosswalk_2020_2010.dta" // crosswalk to 2010 census tract numbers

replace geoid_tract_20=geoid_tract_10
rename geoid_tract_20 ctn

sort ctn year
by ctn year: egen avg_hpi=mean(hpi)
by ctn year: keep if _n==1
drop hpi
rename avg_hpi hpi_ctn
keep ctn year hpi_ctn
save "$path/HPI_ctn.dta", replace


********************************************************************************

// If a HMDA-ctn does not have a HPI, use the ZIP-level HPI as a proxy

use "$path_analysis/hmda_ctn_year_analysis.dta", clear
merge 1:1 ctn year using "$path/HPI_ctn.dta"
keep if _merge==1
drop _merge
bysort ctn: keep if _n==1
keep ctn
drop if ustrpos(ctn,"NA")>0
save "$path/ctn_unmatched.dta", replace

use "$path_raw/HUD_TRACT_ZIP_crosswalk.dta", clear
merge m:1 ctn using "$path/ctn_unmatched.dta"

preserve
keep if _merge==3
drop _merge
* there are 30,891 obs (14,526 tracts) that have a zip code
save "$path/ctn_unmatched_w_zip.dta", replace
restore


keep if _merge==2
drop _merge
* there are still 13,953 obs (13,953 tracts) that don't have a zip
keep ctn
save "$path/ctn_unmatched_no_zip.dta", replace
/*----------------------------------------------------------------------------*/

clear
import delimited "$path_raw/HPI_AT_BDL_ZIP5.csv", varnames(1) stringcols(1) // zip-level housing price index
rename ïzip zip
replace zip="0"+zip if length(zip)==4

gen x=hpi if year==2010
bysort zip: egen X=mean(x)
replace hpi=hpi/X*100
drop x X

keep if year>=2007 & year<=2016

keep zip year hpi

save "$path/HPI_zip.dta", replace
/*----------------------------------------------------------------------------*/

use "$path/ctn_unmatched_w_zip.dta", clear
sort ctn zip
keep ctn zip RES_RATIO
drop if RES_RATIO==0
rename RES_RATIO w

joinby zip using "$path/HPI_zip.dta"

sort ctn year zip 
order ctn year zip
by ctn year: gen x=_N
replace w=1 if x==1

replace hpi=w*hpi
by ctn year: egen sum=sum(hpi)
replace hpi=sum
drop x sum
by ctn year: keep if _n==1

keep ctn year hpi

save "$path/ctn_unmatched_w_zip_w_HPI.dta", replace

/*----------------------------------------------------------------------------*/

use "$path/ctn_unmatched_no_zip.dta", clear
rename ctn ctn00
merge 1:m ctn00 using "$path_raw/census_crosswalk_2000_2010.dta"
keep if _merge==3
drop _merge
sort ctn00 ctn10
rename ctn00 ctn_hmda
rename ctn10 ctn

joinby ctn using "$path_raw/HUD_TRACT_ZIP_crosswalk.dta"
sort ctn_hmda ctn zip
drop BUS_RATIO OTH_RATIO TOT_RATIO

joinby zip using "$path/HPI_zip.dta"

sort ctn_hmda year ctn zip 
order ctn_hmda year ctn zip 

gen w=weight*RES_RATIO
by ctn_hmda year: gen x=_N
replace w=1 if x==1

replace hpi=w*hpi
by ctn_hmda year: egen sum=sum(hpi)
replace hpi=sum
drop x sum
by ctn_hmda year: keep if _n==1

keep ctn_hmda year hpi
rename ctn_hmda ctn

append using "$path/ctn_unmatched_w_zip_w_HPI.dta"
rename hpi hpi_zip
save "$path/HPI_ctn_zipproxy.dta", replace

/*----------------------------------------------------------------------------*/

// For the remaining unmatched, use the county-level HPI as a proxy

clear
import delimited "$path_raw/HPI_AT_BDL_county.csv", varnames(1) stringcols(1,2,3) // county-level housing price index

gen x=hpi if year==2010
bysort fips: egen X=mean(x)
replace hpi=hpi/X*100
drop x X

keep if year>=2007 & year<=2016

keep fips year hpi
rename hpi hpi_county
save "$path/HPI_county.dta", replace

***

use "$path/ctn_unmatched.dta", clear
merge 1:m ctn using "$path/HPI_ctn_zipproxy.dta"
keep if _merge==1
*4,248 remaining
drop _merge
drop year hpi_zip

gen fips=substr(ctn,1,5)
joinby fips using "$path/HPI_county.dta"
sort ctn year
drop fips
save "$path/HPI_ctn_countyproxy.dta", replace


********************************************************************************

use "$path/HPI_ctn.dta", clear
merge 1:1 ctn year using "$path/HPI_ctn_zipproxy.dta"
gen hpi=.
gen source=""
replace source="ctn" if _merge==1 | _merge==3
replace source="zip" if _merge==2
replace hpi=hpi_ctn if source=="ctn"
replace hpi=hpi_zip if source=="zip"
drop _merge
drop hpi_ctn hpi_zip

merge 1:1 ctn year using "$path/HPI_ctn_countyproxy.dta"
replace source="county" if _merge==2
replace hpi=hpi_county if source=="county"
drop hpi_county
drop _merge

gen logprice=log(hpi)

save "$path_analysis/covariates_HPI_ctn.dta", replace

erase "$path/HPI_ctn_zipproxy.dta"
erase "$path/HPI_ctn_countyproxy.dta"
erase "$path/HPI_ctn.dta"
erase "$path/HPI_zip.dta"
erase "$path/HPI_county.dta"
erase "$path/ctn_unmatched.dta"
erase "$path/ctn_unmatched_no_zip.dta"
erase "$path/ctn_unmatched_w_zip.dta"
erase "$path/ctn_unmatched_w_zip_w_HPI.dta"



